CUDA_VISIBLE_DEVICES=1 python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-7B-Instruct --grammar-backend outlines --chat-template qwen2-vl --tool-call-parser qwen25 --host 0.0.0.0 --port 30001

CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 NCCL_P2P_DISABLE=1 python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-72B-Instruct --grammar-backend outlines --chat-template qwen2-vl --tool-call-parser qwen25 --host 0.0.0.0 --port 30001 --tp 8 --disable-custom-all-reduce